package wvtools.extend;

import edu.udo.cs.wvtool.generic.vectorcreation.WVTVectorCreator;
import edu.udo.cs.wvtool.main.WVTDocumentInfo;
import edu.udo.cs.wvtool.main.WVTWordVector;
import edu.udo.cs.wvtool.util.WVToolException;
import edu.udo.cs.wvtool.wordlist.WVTWordList;

public class TFIDFExtend implements WVTVectorCreator
{
    @Override
    public WVTWordVector createVector(int[] frequencies, int numTermOccurences,
            WVTWordList wordList, WVTDocumentInfo d) throws WVToolException
    {
        // Obtain the total number of documents and the document frequencies
        int numDocuments = wordList.getNumDocuments();
        int[] docFrequencies = wordList.getDocumentFrequencies();

        // Create the result structure
        WVTWordVector result = new WVTWordVector();
        double[] wv = new double[docFrequencies.length];

        // Create the vector

        // If the document contains at least one term
        if (numTermOccurences > 0)
        {
            double length = 0.0;
            for (int i = 0; i < wv.length; i++)
            {

                // Note: docFrequencies[i] is always > 0 as otherwise the word
                // would not be in the word list, it is also always smaller as
                // the total number of documents

                double idf = Math.log(((double) numDocuments)
                        / ((double) docFrequencies[i]));

                wv[i] = (((double) frequencies[i]) / ((double) numTermOccurences))
                        * idf;

                length = length + wv[i] * wv[i];
            }

            length = Math.sqrt(length);

            // Normalize the vector
            if (length > 0.0)
                for (int i = 0; i < wv.length; i++)
                    wv[i] = wv[i] / length;

        }
        else
            for (int i = 0; i < wv.length; i++)
                wv[i] = 0.0;

        result.setDocumentInfo(d);
        result.setValues(wv);

        return result;

    }

}
